use "$clean/prowess_wits_IO.dta", clear
******************************************************************************
********************************** Data Cleaning ********************************************
******************************************************************************
rename products_cocode1 co_code1
bysort co_code1 prod_date3: generate nmbr_prods = _N
generate date_string = prod_date3 
generate year2  = substr(date_string,1,4)
generate month2  = substr(date_string,5,2)
generate day2  = substr(date_string,7,2)

keep if ann_rep_months4==12 // Keep firms reporting data for 12 months.
keep if month2=="03" 

destring year, replace


***MOVE ME TO DATA CLEANING
preserve
	contract co_code1 year
	drop _freq
	bysort co_code1: egen min_year = min(year)
	bysort co_code1: egen max_year = max(year)
	gen first_year = min_year == year
	gen last_year = max_year == year
	sort co_code1 year
	xtset co_code1 year
	tsfill, full
	replace last_year = 1 if missing(f.last_year) & co_code1 == co_code1[_n+1]
	replace last_year = . if missing(min_year) 
	gsort co_code1 -year
	replace first_year = 1 if missing(first_year[_n+1]) & co_code1 == co_code1[_n+1]
	replace first_year = . if missing(min_year) 
	sort co_code1 year
	drop if missing(min_year)
	drop min_year max_year
	save "$working/first_last_years.dta", replace
restore


keep if energy_revenue_share > 0 | (nmbr_prods==1 & energy_revenue_share==0)
replace energy_revenue_share = 1 if nmbr_prods==1 & energy_revenue_share==0 
replace energy_intensity = energy_cons_quantity/production if nmbr_prods==1 & energy_revenue_share==0 

sum quantity_revenue_share energy_revenue_share, detail

keep if energy_revenue_share > 0.75 
drop if quantity_revenue_share <=0.75 & nmbr_prods>1

generate q_m = log(production)
generate q_s = log(sales_quantity)
generate r = log(sales_value)
generate ln_price = ln(sales_value/sales_quantity)


merge m:1 co_code1 year using "$working/perpetual_inventory_capital"
drop if _merge==2
drop _merge


merge m:1 year using "$working/wholesale_priceindex"
drop if _merge==2
drop _merge

*Instead of 100, make base year 1
replace wholesale_priceindex = wholesale_priceindex/100

sum  wholesale_priceindex, detail


generate k = log(micro_K)
generate k_alt = log(K_base)
generate l= log(sa_compensation_to_e_160/wholesale_priceindex)
generate m = log(sa_rawmat_stores_spa_146/wholesale_priceindex)
generate e = log(energy_cons_quantity)


foreach variable in r k l  {
	drop if missing(`variable')
}

drop if missing(production) & nmbr_prods>1

egen product_id = group(co_code product_name5)

generate ei_q = energy_intensity*production
bysort co_code year: egen energy_denom = total(ei_q)
generate shares = ei_q/energy_denom
replace shares = 1 if nmbr_prods==1

drop if shares==.

bysort nic_08_2dig: generate total_ind_obs = _N

bysort co_code year: generate cleaned_number_prods = _N

drop if nic_08_2dig=="64"
drop if nic_08_2dig=="46"

tabulate nic_08_2dig, sort

tabulate cleaned_number_prods

bysort co_code year: egen rev_denom = total(sales_value)
generate revenue_share = sales_value/rev_denom

bysort product_id: generate panel_length = _N
tabulate panel_length

foreach variable in k l {
	generate `variable'_j = `variable'+ ln(shares)
}
generate ln_shares = ln(shares)

egen up12 = group(product_name5 product_unit)

keep if nic_08_2dig =="10" | nic_08_2dig =="13" | nic_08_2dig =="17" | nic_08_2dig =="20" | nic_08_2dig =="21"  | nic_08_2dig =="22" | nic_08_2dig =="23" ///
 | nic_08_2dig =="24" | nic_08_2dig =="25" | nic_08_2dig =="26"| nic_08_2dig =="27" | nic_08_2dig =="28" | nic_08_2dig =="29"

generate ln_IND = log(IND_CHN)
generate ln_IVS = ln(LMI_CHN_EXCL_IND)

generate ln_micro_inv = ln(micro_inv) 
generate D_micro_inv = 0
replace D_micro_inv = 1 if micro_inv > 0 & micro_inv!=.

generate D_ln_micro_inv = D_micro_inv*ln_micro_inv
replace D_ln_micro_inv = 0 if D_ln_micro_inv==.

generate ln_inv_base = ln(inv_base) 
generate D_inv_base = 0
replace D_inv_base = 1 if inv_base > 0 & inv_base!=.

generate D_ln_inv_base = D_inv_base*ln_inv_base
replace D_ln_inv_base = 0 if D_ln_inv_base==.

generate I_sq = l * k - 0.5 * (l^2 + k^2)

generate I_sq_micro_inv = l * (D_ln_micro_inv) - 0.5 * (l^2 + (D_ln_micro_inv)^2)
generate I_sq_micro_inv_D = I_sq_micro_inv*D_micro_inv

generate I_sq_inv_base = l * (D_ln_inv_base) - 0.5 * (l^2 + (D_ln_inv_base)^2)
generate I_sq_inv_base_D = I_sq_inv_base*D_inv_base

foreach var in wIPT_IND_CHN wIPT_LMI_CHN_EXCL_IND rwIPT_IND_CHN rwIPT_LMI_CHN_EXCL_IND {
	generate ln_`var' = ln(`var')
}

save "$working/prowess_wits_estimation_ronly_kl.dta", replace
